# ================================================================================== #
# Replication code for: Vote Method and Confidence in Elections                      #
# Authors: Enrijeta Shino and Daniel A. Smith                                        # 
# Journal: Political Research Quarterly                                              #
# Year: 2025                                                                         #
# ===================================================================================#

# Clear environment 
rm(list = ls())

# Libraries
library(data.table)
library(survey)
library(questionr)
library(weights)
library(SortedEffects)
library(sandwich)
library(lmtest)
library(ggplot2)
library(ggeffects)
library(ggpubr)
library(texreg)
library(tidyverse)
library(dplyr)
library(descr)
library(MASS)



# Upload pre-election dataset
load("flsurvey_weighted_vh.RData")


#===================#
# Variable recoding #
#===================#

# Voter confidence in the election 2020: 4 = very confident; 3 = somewhat confident; 
# 2 = not too confident; 1 = not at all confident
flsurvey_weighted_vh$voter_confidence_pre <- with(flsurvey_weighted_vh, ifelse(VOTECONF == "Very confident", 4, 
                                                                        ifelse(VOTECONF == "Somewhat confident", 3,
                                                                        ifelse(VOTECONF == "Not too confident", 2, 1))))

table(flsurvey_weighted_vh$voter_confidence_pre)


# Vote intent November 2020: 0 = other/unsure/Biden; 1 = Trump
flsurvey_weighted_vh$vote20_trump_pre <- with(flsurvey_weighted_vh, ifelse(VOTE20 == "Donald J. Trump (R)", 1, 0))
table(flsurvey_weighted_vh$vote20_trump_pre)


# Follow news about politics: 1 = not at all ---> 5 = a great deal
flsurvey_weighted_vh$political_awareness <- with(flsurvey_weighted_vh, ifelse(NEWS == "None at all", 1, 
                                                                       ifelse(NEWS == "A little", 2, 
                                                                       ifelse(NEWS == "A moderate amount", 3, 
                                                                       ifelse(NEWS == "A lot", 4, 
                                                                       ifelse(NEWS == "A great deal", 5, NA))))))
table(flsurvey_weighted_vh$political_awareness)


# 2016 General Election vote: voter file data
# 1 = VBM; 2 = ED, 3 = EIP, 4 = nonvoter
flsurvey_weighted_vh$gen16 <- with(flsurvey_weighted_vh, ifelse(gen16.x == "A" | gen16.x == "B", 1, 
                                                         ifelse(gen16.x == "Y" | gen16.x == "P", 2, 
                                                         ifelse(gen16.x == "E", 3, 4))))

table(flsurvey_weighted_vh$gen16)
flsurvey_weighted_vh$gen16 <- factor(flsurvey_weighted_vh$gen16)


# Voted 2016, validated: 1 = voted; 0 = didn't vote
flsurvey_weighted_vh$voted16_vf <- ifelse(flsurvey_weighted_vh$gen16 == 4, 0, 1)
table(flsurvey_weighted_vh$voted16_vf)


# 2018 General Election vote: voter file data
# 1 = VBM; 2 = ED, 3 = EIP; 4 = nonvoters
flsurvey_weighted_vh$gen18.x1 <- as.numeric(flsurvey_weighted_vh$gen18.x)
flsurvey_weighted_vh$gen18.x1[is.na(flsurvey_weighted_vh$gen18.x1)] <- 99

flsurvey_weighted_vh$gen18 <- with(flsurvey_weighted_vh, ifelse(gen18.x1 == 2 | gen18.x1 == 3, 1,
                                                         ifelse(gen18.x1 == 6 | gen18.x1 == 7, 2,
                                                         ifelse(gen18.x1 == 4, 3, 4))))

table(flsurvey_weighted_vh$gen18)
flsurvey_weighted_vh$gen18 <- factor(flsurvey_weighted_vh$gen18)


# Voted 2018, validated: 1 = voted; 0 = didn't vote
flsurvey_weighted_vh$voted18_vf <- ifelse(flsurvey_weighted_vh$gen18 == 4, 0, 1)
table(flsurvey_weighted_vh$voted18_vf)



# Past vote method 2016 and 2018
# 1 = VBM voter both elections; 
# 2 = ED or EIP voter both; 
# 3 = it varies in vote method in one the elections; 
# non-voters in 2016 and/or 2018: Dropped
flsurvey_weighted_vh$gen161 <- flsurvey_weighted_vh$gen16
flsurvey_weighted_vh$gen161[flsurvey_weighted_vh$gen161==4] <- NA
flsurvey_weighted_vh$gen181 <- flsurvey_weighted_vh$gen18
flsurvey_weighted_vh$gen181[flsurvey_weighted_vh$gen181==4] <- NA


flsurvey_weighted_vh$past_vmethod_vf <- with(flsurvey_weighted_vh, ifelse((gen161 == 1 & gen181 == 1), 1,
                                                                   ifelse((gen161 == 2 & gen181 == 2) |
                                                                          (gen161 == 3 & gen181 == 3) |
                                                                          (gen161 == 3 & gen181 == 2) |
                                                                          (gen161 == 2 & gen181 == 3), 2, 3)))
table(flsurvey_weighted_vh$past_vmethod_vf)

flsurvey_weighted_vh$past_vmethod_vf <- factor(flsurvey_weighted_vh$past_vmethod_vf)



# 2020 General Election vote: voter file
# 0 = VBM; 1 = ED/EIP; 4 = nonvoters (dropped)
flsurvey_weighted_vh$vmethod_gen20_vf <- with(flsurvey_weighted_vh, ifelse(VoteHistoryCode == "A" | 
                                                                           VoteHistoryCode == "B", 0, 
                                                                    ifelse(VoteHistoryCode == "Y" | 
                                                                           VoteHistoryCode == "E", 1, NA)))

table(flsurvey_weighted_vh$vmethod_gen20_vf)



# 2020 General Election vote: voter file
# 0 = VBM; 1 = ED/EIP; 4 = nonvoters 
flsurvey_weighted_vh$vmethod_gen20_vf1 <- with(flsurvey_weighted_vh, ifelse(VoteHistoryCode == "A" | 
                                                                            VoteHistoryCode == "B", 0, 
                                                                     ifelse(VoteHistoryCode == "Y" | 
                                                                            VoteHistoryCode == "E", 1, 4)))

table(flsurvey_weighted_vh$vmethod_gen20_vf1)


# Voted 2020, validated: 1 = voted; 0 = didn't vote
flsurvey_weighted_vh$voted20_vf <- ifelse(flsurvey_weighted_vh$vmethod_gen20_vf1 == 4, 0, 1)
table(flsurvey_weighted_vh$voted20_vf)



# Gender voter file: 1 = female, 0 = male
flsurvey_weighted_vh$female <- with(flsurvey_weighted_vh, ifelse(Gender == "M", 0, 
                                                          ifelse(Gender == "F", 1, NA)))

table(flsurvey_weighted_vh$female)


# Age: 1 = 18-24; 2 = 25-34; 3 = 35-44; 4 = 45-54; 5 = 55-64; 6 = 65+
flsurvey_weighted_vh$age <-  as.numeric(flsurvey_weighted_vh$AGE)
flsurvey_weighted_vh$age <- with(flsurvey_weighted_vh, ifelse(age == 4, 1,
                                                       ifelse(age == 5, 2,
                                                       ifelse(age == 6, 3, 
                                                       ifelse(age == 7, 4,
                                                       ifelse(age == 8, 5, 
                                                       ifelse(age == 9, 6, NA)))))))
table(flsurvey_weighted_vh$age)


# Education: 1 = high school or less, 2 = some college, 3 = college degree, 4 = graduate degree
flsurvey_weighted_vh$education <- as.numeric(flsurvey_weighted_vh$EDU)
flsurvey_weighted_vh$education <- with(flsurvey_weighted_vh, ifelse(education == 7 | education == 6, 1,
                                                             ifelse(education == 8, 2,
                                                             ifelse(education == 4| education == 9, 3,
                                                             ifelse(education == 5, 4, NA)))))
table(flsurvey_weighted_vh$education)


# Race voter file: 1 = white, 2 = black, 3 = hispanic, 4 = other 
flsurvey_weighted_vh$race_voterfile <- with(flsurvey_weighted_vh, ifelse(Race == "5", 1, 
                                                                  ifelse(Race == "3", 2,
                                                                  ifelse(Race == "4", 3, 4))))
table(flsurvey_weighted_vh$race_voterfile)


# Race: 1 = white, 0 = other
flsurvey_weighted_vh$white <- ifelse(flsurvey_weighted_vh$race_voterfile == 1, 1, 0)
table(flsurvey_weighted_vh$white)


# Ideology: 1 Very liberal ---> 7 Very conservative
flsurvey_weighted_vh$ideo7 <- with(flsurvey_weighted_vh, ifelse(IDEO == "Very conservative ", 7, 
                                                         ifelse(IDEO == "Conservative ", 6, 
                                                         ifelse(IDEO == "Slightly conservative ", 5, 
                                                         ifelse(IDEO == "Moderate, middle of the road ", 4, 
                                                         ifelse(IDEO == "Slightly liberal ", 3, 
                                                         ifelse(IDEO == "Liberal ", 2, 
                                                         ifelse(IDEO == "Very liberal ", 1, NA))))))))

table(flsurvey_weighted_vh$ideo7, useNA = "ifany")


# Party registration: 1 = Dem, 2 = Rep, 3 = NPA 
flsurvey_weighted_vh$pid <- as.character(flsurvey_weighted_vh$PartyAffiliation)
flsurvey_weighted_vh$pid <- with(flsurvey_weighted_vh, ifelse(pid == "DEM", 1, 
                                                       ifelse(pid == "REP", 2,
                                                       ifelse(pid == "NPA", 3, NA))))
table(flsurvey_weighted_vh$pid)
flsurvey_weighted_vh$pid <- factor(flsurvey_weighted_vh$pid)

# Drop NPAs from primary elections  
flsurvey_weighted_vh$pid2 <- flsurvey_weighted_vh$pid
flsurvey_weighted_vh$pid2[flsurvey_weighted_vh$pid2 == 3] <- NA
table(flsurvey_weighted_vh$pid2)




#=======================#
# post-election dataset #
#=======================#

load("prepost_dta.Rdata")

# merge the pre- and post-election surveys and keep only those that are in both surveys. 
# prepost_dta <- merge(flsurvey_weighted_vh, merged_flsurvey, by= "ExternalReference")


#==============================#
# recode post-election dataset #
#==============================#

# Self-reported 2020 vote method (post-election)
# Vote method used in 2020: 0 = VBM; 1 = ED/EIP (drop unsure)
prepost_dta$vmethod_gen20post_r <- with(prepost_dta, ifelse(vote20_method_w2 == 1, 0, 
                                                     ifelse(vote20_method_w2 == 2 | 
                                                            vote20_method_w2 == 3, 1, NA)))

table(prepost_dta$vmethod_gen20post_r)


# Vote count confidence: 1 = not confident ---> 5 = very confident
prepost_dta$votes_count_w21 <- 6 - prepost_dta$votes_count_w2
table(prepost_dta$votes_count_w21)

# Confidence 4 categories; neither nor dropped
prepost_dta$voter_confidence_post <- prepost_dta$votes_count_w21
prepost_dta$voter_confidence_post[prepost_dta$voter_confidence_post==3] <- NA
prepost_dta$voter_confidence_post[prepost_dta$voter_confidence_post==4] <- 3
prepost_dta$voter_confidence_post[prepost_dta$voter_confidence_post==5] <- 4
table(prepost_dta$voter_confidence_post)


# Change in confidence post - pre election
prepost_dta$confidenc_change <- prepost_dta$voter_confidence_post - prepost_dta$voter_confidence_pre
table(prepost_dta$confidenc_change)

# Change in confidence post - pre election: -1 = dropped; 0 = didn't change; 1 = increased
prepost_dta$confidenc_change3 <- with(prepost_dta, ifelse(confidenc_change < 0, -1,
                                                          ifelse(confidenc_change > 0, 1, 0)))
table(prepost_dta$confidenc_change3)


# Vote method type gen 2016-2018 and 2020:
# 1: VBM in both --> VBM 2020 
# 2: in-person in both --> VBM 2020 
# 3: it varies in vote method -> VBM 2020

# 4: VBM in both --> in-person 2020 
# 5: in-person in both --> in-person 2020 
# 6: it varies in vote method -> in-person 2020

prepost_dta$vmethod_prepost <- with(prepost_dta, ifelse(past_vmethod_vf == 1 & vmethod_gen20_vf == 0, 1,
                                                 ifelse(past_vmethod_vf == 2 & vmethod_gen20_vf == 0, 2,
                                                 ifelse(past_vmethod_vf == 3 & vmethod_gen20_vf == 0, 3,
                                                 ifelse(past_vmethod_vf == 1 & vmethod_gen20_vf == 1, 4,
                                                 ifelse(past_vmethod_vf == 2 & vmethod_gen20_vf == 1, 5, 
                                                 ifelse(past_vmethod_vf == 3 & vmethod_gen20_vf == 1, 6, NA)))))))

table(prepost_dta$vmethod_prepost)
prepost_dta$vmethod_prepost <- factor(prepost_dta$vmethod_prepost)



#========================# 
# Table 1: Descriptives  #
#========================# 

# Voter confidence (pre- and post-election) by vote method
crosstab(prepost_dta$voter_confidence_pre, prepost_dta$past_vmethod_vf, 
         prop.c = T, weight = prepost_dta$weights) # pre-election confidence and past vote method

crosstab(prepost_dta$voter_confidence_pre, prepost_dta$vmethod_gen20_vf, 
         prop.c = T, weight = prepost_dta$weights) # pre-election confidence and vote method 2020


#=======================================================# 
# Table 2: Pre-election confidence and Past vote method #
#=======================================================# 

prepost_dta <- data.table(prepost_dta)

# Model 1: pre-election confidence, validated past vote method
m1_pre <- lm(voter_confidence_pre ~ past_vmethod_vf + vote20_trump_pre + political_awareness + 
               pid + ideo7 + age + female + white + education + factor(county), 
             data = prepost_dta[voted20_vf==1])
summary(m1_pre)

# Cluster robust standard errors for model coefficients
vcov_county1 <- sandwich::vcovCL(m1_pre, cluster = ~county)
m1_prer <- coeftest(m1_pre, vcov_county1)




#==============#
# Model Output #
#==============#

# variable names
map <- list("(Intercept)" = "(Intercept)",
            "past_vmethod_vf2" = "In-person",
            "past_vmethod_vf3" = "Different vote methods",
            "vote20_trump_pre" = "Trump supporter",
            "political_awareness" = "Political awareness",
            "pid2" = "Republican",
            "pid3" = "No party affiliate",
            "ideo7" = "Ideology",
            "age" = "Age",
            "female" = "Female",
            "white" = "White",
            "education" = "Education")

# Specify list of models 
models = list(m1_prer)

# Specify caption 
caption = "Voter Confidence and Past Vote Method in the 2016 and 2018 General Elections, Pre-2020 General Election Survey"

# model output
texreg(models,
       file = "/Users/Enrijeta/Dropbox/Apps/Overleaf/Vote Method and Confidence/prq_rr_tables/table2.tex", 
       caption = caption,
       caption.above=TRUE,
       
       custom.gof.rows = list(
         
         "County FE" = c("Yes"), 
         
         "Num. obs." = c(nobs(m1_prer)), 
         
         "R-squared" = c(summary(m1_pre)$r.squared), 
         
         "Adjusted R-squared" = c(summary(m1_pre)$adj.r.squared) 
       ),  
       
       custom.coef.map = map,
       include.nobs = TRUE,
       custom.note = "%stars. standard errors in parentheses",
       dcolumn = TRUE,
       omit.coef = "county",
       custom.model.names = c("Model 1 (validated)"),
       digits = 3,
       single.row = FALSE,
       override.se = list(m1_prer[,1],
                          override.pval=list(m1_prer[,3])))




#================================================================# 
# Table 3: Pre and Post-election confidence and 2020 vote method #
#================================================================# 


## Model 1: pre-election confidence, validated 2020 vote method
m1_pre_vm <- lm(voter_confidence_pre ~ vmethod_gen20_vf + vote20_trump_pre + political_awareness + 
                  pid + ideo7 + age + female + white + education + factor(county), 
                data = prepost_dta[voted20_vf==1])
summary(m1_pre_vm)

# cluster robust standard errors for model coefficients
vcov_county2 <- sandwich::vcovCL(m1_pre_vm, cluster = ~county)
m1_pre_vmr <- coeftest(m1_pre_vm, vcov_county2)



## Model 2: pre-election confidence + interaction term, validated 2020 vote method
m1_pre_vm_int <- lm(voter_confidence_pre ~ vmethod_gen20_vf*vote20_trump_pre + political_awareness + 
                      pid + ideo7 + age + female + white + education + factor(county), 
                    data = prepost_dta[voted20_vf==1])
summary(m1_pre_vm_int)

# cluster robust standard errors for model coefficients
vcov_county2 <- sandwich::vcovCL(m1_pre_vm_int, cluster = ~county)
m1_pre_vmr_int <- coeftest(m1_pre_vm_int, vcov_county2)



## Model 3: post-election confidence, validated past vote method
m2_post <- lm(voter_confidence_post ~ vmethod_gen20_vf + vote20_trump_pre + political_awareness + 
                pid + ideo7 + age + female + white + education + factor(county), 
              data = prepost_dta[voted20_vf==1])
summary(m2_post)

# cluster robust standard errors for model coefficients
vcov_county3 <- sandwich::vcovCL(m2_post, cluster = ~county)
m2_postr <- coeftest(m2_post, vcov_county3)



## Model 4: post-election confidence + interaction term, validated past vote method
m2_post_int <- lm(voter_confidence_post ~ vmethod_gen20_vf*vote20_trump_pre + political_awareness + 
                    pid + ideo7 + age + female + white + education + factor(county), 
                  data = prepost_dta[voted20_vf==1])
summary(m2_post_int)

# cluster robust standard errors for model coefficients
vcov_county3 <- sandwich::vcovCL(m2_post_int, cluster = ~county)
m2_postr_int <- coeftest(m2_post_int, vcov_county3)



#==============#
# Model Output #
#==============#


# variable names
map <- list("(Intercept)" = "(Intercept)",
            "vmethod_gen20_vf" = "In-person",
            "vote20_trump_pre" = "Trump supporter",
            "vmethod_gen20_vf:vote20_trump_pre" = "In-personXTrump supporter",
            "political_awareness" = "Political awareness",
            "pid2" = "Republican",
            "pid3" = "No party affiliate",
            "ideo7" = "Ideology",
            "age" = "Age",
            "female" = "Female",
            "white" = "White",
            "education" = "Education")

# Specify list of models 
models = list(m1_pre_vmr, m1_pre_vmr_int, m2_postr, m2_postr_int)

# Specify caption 
caption = "Voter Confidence and Vote Method in the 2020 General Elections"

# model output
texreg(models,
       file = "/Users/Enrijeta/Dropbox/Apps/Overleaf/Vote Method and Confidence/prq_rr_tables/table3.tex",
       caption = caption,
       caption.above=TRUE,
       
       custom.gof.rows = list(
         
         "County FE" = c("Yes", "Yes", "Yes", "Yes"), 
         
         "Num. obs." = c(nobs(m1_pre_vm), 
                         nobs(m1_pre_vm_int), 
                         nobs(m2_post),
                         nobs(m2_post_int)), 
         
         "R-squared" = c(summary(m1_pre_vm)$r.squared, 
                         summary(m1_pre_vm_int)$r.squared,
                         summary(m2_post)$r.squared,
                         summary(m2_post_int)$r.squared), 
         
         "Adjusted R-squared" = c(summary(m1_pre_vm)$adj.r.squared, 
                                  summary(m1_pre_vm_int)$adj.r.squared,
                                  summary(m2_post)$adj.r.squared,
                                  summary(m2_post_int)$adj.r.squared) 
       ),  
       
       custom.coef.map = map,
       include.nobs = TRUE,
       custom.note = "%stars. standard errors in parentheses",
       dcolumn = TRUE,
       omit.coef = "county",
       custom.model.names = c("Model 1 (validated)", "Model 1int (validated)", 
                              "Model 2 (validated)", "Model 2int (validated)"),
       digits = 3,
       single.row = FALSE,
       override.se = list(m1_pre_vmr[,1],
                          m1_pre_vmr_int[,1],
                          m2_postr[,1],
                          m2_postr_int[,1],
                          override.pval=list(m1_pre_vmr[,3],
                                             m1_pre_vmr_int[,3],
                                             m2_postr[,3],
                                             m2_postr_int[,3])))


#==============================================================================# 
# Figure 2: Predicted values for post-election confidence and 2020 vote method #
#==============================================================================# 

m2_post <- lm(voter_confidence_post ~ vmethod_gen20_vf*vote20_trump_pre + political_awareness + 
                pid + ideo7 + age + female + white + education + factor(county), 
              data = prepost_dta[voted20_vf==1])
summary(m2_post)

# Get predicted values 
m3 <- ggpredict(m2_post, terms = c("vmethod_gen20_vf", "vote20_trump_pre"), 
                typical = "mode", ci_level = 0.95)

m3 <- as.data.frame(m3)


# The errorbars overlapped, so use position_dodge to move them horizontally
pd <- position_dodge(0.5) # move them .05 to the left and right


# Plot
plot2b <- ggplot(m3, aes(x = x, y = predicted, 
                         colour = group)) + 
  
  # add the coefficients
  geom_point(aes(shape = group), position = pd, size=4) + 
  
  # remove the automatically generated legend
  guides(shape="none") + 
  
  # add a ribbon with the confidence band 
  geom_errorbar(
    aes(
      # lower and upper bound of the ribbon
      ymin = conf.low, ymax = conf.high,
      colour = group
    ), position = pd, width = .01, size = 1) + 
  
  # break y-axis every 0.25 and show it in percentages
  scale_y_continuous(breaks = seq(1, 4, by = 0.5), limits = c(1, 4)) +
  xlab("") +
  ylab("Voter confidence (post-election)") + 
  scale_x_continuous(breaks=c(0, 1), 
                     labels = c("0" = "Mail", "1" = "In-person"), expand = c(0, 0)) +
  
  # add the shapes in order: square, circle, and triangle 
  scale_shape_manual(values = c(16, 17)) + 
  
  # set the shapes of points in legend 
  guides(colour = guide_legend(override.aes = list(shape = c(16, 17)))) +
  # different line colors 
  scale_color_manual(
    name = "Trump support",
    values = c("0" = "black", "1" = "black")) +
  
  # remove background, set legend position bottom
  theme_bw() + theme(
    text = element_text(size = 15),
    panel.grid.minor = element_blank(),
    legend.position = "bottom", 
    axis.title = element_text(size = 10),
    axis.text.y = element_text(size = 10),
    axis.text.x = element_text(size = 12),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 10)
  )

plot2b

# save graph on overleaf
ggsave(filename = "/Users/Enrijeta/Dropbox/Apps/Overleaf/Vote Method and Confidence/plots/plot2b.pdf", width = 6, height = 6)



#===========================================# 
# Table 4: Descriptive change in confidence #
#===========================================# 

# confidence change by vote method
crosstab(prepost_dta$confidenc_change3, prepost_dta$vmethod_prepost, prop.c = T) 


#===============================# 
# Table 5: Change in confidence #
#===============================#

# Set 5 as the baseline level: in-person to in-person (2020)
prepost_dta$vmethod_prepost <- relevel(prepost_dta$vmethod_prepost, ref = "5")


## Model 1: change in confidence, validated 2020 vote method
m2_ch <- lm(confidenc_change3 ~ vmethod_gen20_vf + vote20_trump_pre + political_awareness + 
              pid + ideo7 + age + female + white + education + factor(county), 
            data = prepost_dta[voted20_vf==1])
summary(m2_ch)

# cluster robust standard errors for model coefficients
vcov_county11 <- sandwich::vcovCL(m2_ch, cluster = ~county)
m2_chr <- coeftest(m2_ch, vcov_county11)



## Model 2: change in confidence + interaction term, validated 2020 vote method
m2_ch_int <- lm(confidenc_change3 ~ vmethod_gen20_vf*vote20_trump_pre + political_awareness + 
                  pid + ideo7 + age + female + white + education + factor(county), 
                data = prepost_dta[voted20_vf==1])
summary(m2_ch_int)

# cluster robust standard errors for model coefficients
vcov_county11 <- sandwich::vcovCL(m2_ch_int, cluster = ~county)
m2_chr_int <- coeftest(m2_ch_int, vcov_county11)




## Model 3: change in confidence, validated past and 2020 vote method
m1_ch <- lm(confidenc_change3 ~ vmethod_prepost + vote20_trump_pre + political_awareness + 
              pid + ideo7 + age + female + white + education + factor(county), 
            data = prepost_dta[voted20_vf==1])
summary(m1_ch)

# cluster robust standard errors for model coefficients
vcov_county10 <- sandwich::vcovCL(m1_ch, cluster = ~county)
m1_chr <- coeftest(m1_ch, vcov_county10)


## Model 4: change in confidence + interaction, validated past and 2020 vote method
m1_ch_int <- lm(confidenc_change3 ~ vmethod_prepost*vote20_trump_pre + political_awareness + 
                  pid + ideo7 + age + female + white + education + factor(county), 
                data = prepost_dta[voted20_vf==1])
summary(m1_ch_int)

# cluster robust standard errors for model coefficients
vcov_county10 <- sandwich::vcovCL(m1_ch_int, cluster = ~county)
m1_chr_int <- coeftest(m1_ch_int, vcov_county10)



#==============#
# Model Output #
#==============#

# variable names
map <- list("(Intercept)" = "(Intercept)",
            "vmethod_gen20_vf" = "In-person (GEN2020)",
            "vmethod_prepost2" = "In-person (GEN2016-2018) to Mail (GEN2020)",
            "vmethod_prepost1" = "Mail (GEN2016-2018) to Mail (GEN2020)",
            "vmethod_prepost4" = "Mail (GEN2016-2018) to In-person (GEN2020)",
            "vmethod_prepost3" = "Different method (GEN2016-2018) to Mail (GEN2020)",
            "vmethod_prepost6" = "Different method (GEN2016-2018) to In-person (GEN2020)",
            "vote20_trump_pre" = "Trump supporter",
            "vmethod_prepost2:vote20_trump_pre" = "In-person (GEN2016-2018) to Mail (GEN2020)XTrump supporter",
            "vmethod_prepost1:vote20_trump_pre" = "Mail (GEN2016-2018) to Mail (GEN2020)XTrump supporter",
            "vmethod_prepost4:vote20_trump_pre" = "Mail (GEN2016-2018) to In-person (GEN2020)XTrump supporter",
            "vmethod_prepost3:vote20_trump_pre" = "Different method (GEN2016-2018) to Mail (GEN2020)XTrump supporter",
            "vmethod_prepost6:vote20_trump_pre" = "Different method (GEN2016-2018) to In-person (GEN2020)XTrump supporter",
            "vmethod_gen20_vf:vote20_trump_pre" = "In-person (GEN2020)XTrump supporter",
            "political_awareness" = "Political awareness",
            "pid2" = "Republican",
            "pid3" = "No party affiliate",
            "ideo7" = "Ideology",
            "age" = "Age",
            "female" = "Female",
            "white" = "White",
            "education" = "Education")

# Specify list of models 
models = list(m2_chr, m2_chr_int, m1_chr, m1_chr_int)

# Specify caption 
caption = "Change in Confidence in the 2020 General Election Survey Waves and Validated Vote Method"

# model output
texreg(models,
       file = "/Users/Enrijeta/Dropbox/Apps/Overleaf/Vote Method and Confidence/prq_rr_tables/table51.tex", 
       caption = caption,
       caption.above=TRUE,
       
       custom.gof.rows = list(
         
         "County FE" = c("Yes", "Yes", "Yes", "Yes"), 
         
         "Num. obs." = c(nobs(m2_chr),
                         nobs(m2_chr_int),
                         nobs(m1_chr),
                         nobs(m1_chr_int)), 
         
         "R-squared" = c(summary(m2_ch)$r.squared,
                         summary(m2_ch_int)$r.squared,
                         summary(m1_ch)$r.squared, 
                         summary(m1_ch_int)$r.squared),
         
         "Adjusted R-squared" = c(summary(m2_ch)$adj.r.squared,
                                  summary(m2_ch_int)$adj.r.squared,
                                  summary(m1_ch)$adj.r.squared,
                                  summary(m1_ch_int)$adj.r.squared) 
       ),  
       
       custom.coef.map = map,
       include.nobs = TRUE,
       custom.note = "%stars. standard errors in parentheses",
       dcolumn = TRUE,
       omit.coef = "county",
       custom.model.names = c("(1)", "(2)", "(3)", "(4)"),
       digits = 3,
       single.row = FALSE,
       override.se = list(m2_chr[,1],
                          m2_chr_int[,1],
                          m1_chr[,1],
                          m1_chr_int[,1],
                          override.pval=list(m2_chr[,3],
                                             m2_chr_int[,3],
                                             m1_chr[,3],
                                             m1_chr_int[,3])))



#================================================================================# 
# Figure 3a: Predicted values for change in confidence and past/2020 vote method #
#================================================================================# 

m1_ch <- lm(confidenc_change3 ~ vmethod_prepost*vote20_trump_pre + political_awareness + 
              pid + ideo7 + age + female + white + education + factor(county), 
            data = prepost_dta[voted20_vf==1])
summary(m1_ch)

# Get predicted values 
m4 <- ggpredict(m1_ch, terms = c("vmethod_prepost", "vote20_trump_pre"), 
                typical = "mode", ci_level = 0.95)

m4 <- as.data.frame(m4)

# keep only significant interactions
m4 <- m4 %>% filter(x %in% c(1, 4, 5))

# The errorbars overlapped, so use position_dodge to move them horizontally
pd <- position_dodge(0.5) # move them .05 to the left and right

m4$x <- factor(m4$x, levels = c("5", "4", "1"))

# Plot
plot3a <- ggplot(m4, aes(x = x, y = predicted, 
                         colour = group)) + 
  
  # add the coefficients
  geom_point(aes(shape = group), position = pd, size=4) + 
  
  # remove the automatically generated legend
  guides(shape="none") + 
  
  # add a ribbon with the confidence band 
  geom_errorbar(
    aes(
      # lower and upper bound of the ribbon
      ymin = conf.low, ymax = conf.high,
      colour = group
    ), position = pd, width = .01, size = 1) + 
  
  # break y-axis every 0.25 and show it in percentages
  scale_y_continuous(breaks = seq(-1, 1, by = 0.5), limits = c(-1, 1)) +
  xlab("") +
  ylab("Change in voter confidence (pre-post election)") + 
  scale_x_discrete(labels = c("In-person->In-person", "Mail->In-person", "Mail->Mail")) +
  
  # add the shapes in order: square, circle, and triangle 
  scale_shape_manual(values = c(16, 17)) + 
  
  # set the shapes of points in legend 
  guides(colour = guide_legend(override.aes = list(shape = c(16, 17)))) +
  # different line colors 
  scale_color_manual(
    name = "Trump support",
    values = c("0" = "black", "1" = "black")) +
  
  # remove background, set legend position bottom
  theme_bw() + theme(
    text = element_text(size = 16),
    panel.grid.minor = element_blank(),
    legend.position = "bottom", 
    axis.title = element_text(size = 10),
    axis.text.y = element_text(size = 10),
    axis.text.x = element_text(size = 9),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 10)
  )

plot3a


# save graph on overleaf
ggsave(filename = "/Users/Enrijeta/Dropbox/Apps/Overleaf/Vote Method and Confidence/plots/plot3a.pdf", width = 6, height = 6)



#=======================================#
# Appendix: Ordered logit Replication   #
#=======================================#

prepost_dta <- data.table(prepost_dta)

#===============================================# 
# Table 2 (replication): Ordered Logistic Model #
#===============================================# 

library(brant)

prepost_dta$voter_confidence_preo <- factor(prepost_dta$voter_confidence_pre)

prepost_dta$voter_confidence4o <- factor(prepost_dta$voter_confidence4)
prepost_dta$county <- factor(prepost_dta$county)

# Model 1 replication Table 2
mt2_preconfidenceo <- polr(voter_confidence_preo ~  past_vmethod_vf + vote20_trump_pre + political_awareness + 
                             pid + ideo7 + age + female + white + education, 
                           data = prepost_dta[voted20_vf==1], Hess=TRUE)

summary(mt2_preconfidenceo)

# Run the Brant test: Test Fails, that's why we estiamte Generalized Logistic Regression Models below
brant_test_result <- brant(mt2_preconfidenceo)

# Display the results
print(brant_test_result)


##----------------------------------------##
##----------------------------------------##
## Generalized Logistic Regression Models ##
##----------------------------------------##
##----------------------------------------##

#===================================================# 
# Table 2 (replication): Generalized Logistic Model #
#===================================================# 

library(ordinal)
library(texreg)


# Fit the generalized logistic regression model
mt2_glogit <- clm(voter_confidence_preo ~ past_vmethod_vf + vote20_trump_pre + political_awareness + 
                    pid + ideo7 + age + female + white + education, 
                  data = prepost_dta[voted20_vf==1], link = "logit")

# Display the summary of the model
summary(mt2_glogit)


# variable names
map <- list("(Intercept)" = "(Intercept)",
            "past_vmethod_vf2" = "In-person",
            "past_vmethod_vf3" = "Different vote methods",
            "vote20_trump_pre" = "Trump supporter",
            "political_awareness" = "Political awareness",
            "pid2" = "Republican",
            "pid3" = "No party affiliate",
            "ideo7" = "Ideology",
            "age" = "Age",
            "female" = "Female",
            "white" = "White",
            "education" = "Education", 
            "1|2" = "1|2",
            "2|3" = "2|3",
            "3|4" = "3|4")

# Specify list of models 
models = list(mt2_glogit)

# Specify caption 
caption = "Generalized Logistic Models for Voter Confidence and Validated Vote Method in the 2016 and 2018 General Elections"

# model output
texreg(models,
       file = "/Users/Enrijeta/Dropbox/Apps/Overleaf/Vote Method and Confidence/appendix/appendix_tables/gologit_tab2.tex",
       caption = caption,
       caption.above=TRUE,
       custom.coef.map = map,
       include.nobs = TRUE,
       custom.note = "%stars. standard errors in parentheses",
       dcolumn = TRUE,
       omit.coef = "county",
       custom.model.names = c("Model 1 (validated)"),
       digits = 3,
       single.row = FALSE)



#===================================================# 
# Table 3 (replication): Generalized Logistic Model #
#===================================================# 

prepost_dta$voter_confidence_post <- factor(prepost_dta$voter_confidence_post)


# Model 1
mt3_glogit1 <- clm(voter_confidence_preo ~ vmethod_gen20_vf + vote20_trump_pre + political_awareness + 
                     pid + ideo7 + age + female + white + education, 
                   data = prepost_dta[voted20_vf==1], link = "logit")

# Display the summary of the model
summary(mt3_glogit1)


# Model 2
mt3_glogit1_int <- clm(voter_confidence_preo ~ vmethod_gen20_vf*vote20_trump_pre + political_awareness + 
                         pid + ideo7 + age + female + white + education, 
                       data = prepost_dta[voted20_vf==1], link = "logit")

# Display the summary of the model
summary(mt3_glogit1_int)


# Model 3
mt3_glogit2 <- clm(voter_confidence_post ~ vmethod_gen20_vf + vote20_trump_pre + political_awareness + 
                     pid + ideo7 + age + female + white + education, 
                   data = prepost_dta[voted20_vf==1], link = "logit")

# Display the summary of the model
summary(mt3_glogit2)


# Model 4
mt3_glogit2_int <- clm(voter_confidence_post ~ vmethod_gen20_vf*vote20_trump_pre + political_awareness + 
                         pid + ideo7 + age + female + white + education, 
                       data = prepost_dta[voted20_vf==1], link = "logit")

# Display the summary of the model
summary(mt3_glogit2_int)


# variable names
map <- list("(Intercept)" = "(Intercept)",
            "vmethod_gen20_vf" = "In-person",
            "vote20_trump_pre" = "Trump supporter",
            "vmethod_gen20_vf:vote20_trump_pre" = "In-personXTrump supporter",
            "political_awareness" = "Political awareness",
            "pid2" = "Republican",
            "pid3" = "No party affiliate",
            "ideo7" = "Ideology",
            "age" = "Age",
            "female" = "Female",
            "white" = "White",
            "education" = "Education",
            "1|2" = "1|2",
            "2|3" = "2|3",
            "3|4" = "3|4")

# Specify list of models 
models = list(mt3_glogit1, mt3_glogit1_int, mt3_glogit2, mt3_glogit2_int)

# Specify caption 
caption = "Generalized Logistic Models for Voter Confidence and Validated Vote Method in the 2020 General Elections"

# model output
texreg(models,
       file = "/Users/Enrijeta/Dropbox/Apps/Overleaf/Vote Method and Confidence/appendix/appendix_tables/gologit_tab3.tex",
       caption = caption,
       caption.above=TRUE,
       custom.coef.map = map,
       include.nobs = TRUE,
       custom.note = "%stars. standard errors in parentheses",
       dcolumn = TRUE,
       omit.coef = "county",
       custom.model.names = c("Model 1", "Model 2", "Model 3", "Model 4"),
       digits = 3,
       single.row = FALSE)


#===================================================# 
# Table 5 (replication): Generalized Logistic Model #
#===================================================# 

prepost_dta$confidenc_change3 <- factor(prepost_dta$confidenc_change3)

# Set 5 as the baseline level: in-person to in-person (2020)
prepost_dta$vmethod_prepost <- relevel(prepost_dta$vmethod_prepost, ref = "5")


# Model 1
mt5_glogit1 <- clm(confidenc_change3 ~ vmethod_prepost + vote20_trump_pre + political_awareness + 
                     pid + ideo7 + age + female + white + education, 
                   data = prepost_dta[voted20_vf==1], link = "logit")

# Display the summary of the model
summary(mt5_glogit1)


# Model 2
mt5_glogit1_int <- clm(confidenc_change3 ~ vmethod_prepost*vote20_trump_pre + political_awareness + 
                         pid + ideo7 + age + female + white + education, 
                       data = prepost_dta[voted20_vf==1], link = "logit")

# Display the summary of the model
summary(mt5_glogit1_int)



# Model 3
mt5_glogit2 <- clm(confidenc_change3 ~ vmethod_gen20_vf + vote20_trump_pre + political_awareness + 
                     pid + ideo7 + age + female + white + education, 
                   data = prepost_dta[voted20_vf==1], link = "logit")

# Display the summary of the model
summary(mt5_glogit2)


# Model 4
mt5_glogit2_int <- clm(confidenc_change3 ~ vmethod_gen20_vf*vote20_trump_pre + political_awareness + 
                         pid + ideo7 + age + female + white + education, 
                       data = prepost_dta[voted20_vf==1], link = "logit")

# Display the summary of the model
summary(mt5_glogit2_int)


## Model output ##

# variable names
map <- list("(Intercept)" = "(Intercept)",
            "vmethod_gen20_vf" = "In-person (GEN2020)",
            "vmethod_prepost2" = "In-person (GEN2016-2018) to Mail (GEN2020)",
            "vmethod_prepost4" = "Mail (GEN2016-2018) to In-person (GEN2020)",
            "vmethod_prepost1" = "Mail (GEN2016-2018) to Mail (GEN2020)",
            "vmethod_prepost3" = "Different method (GEN2016-2018) to Mail (GEN2020)",
            "vmethod_prepost6" = "Different method (GEN2016-2018) to In-person (GEN2020)",
            "vote20_trump_pre" = "Trump supporter",
            "vmethod_gen20_vf:vote20_trump_pre" = "In-person (GEN2020)XTrump supporter",
            "vmethod_prepost2:vote20_trump_pre" = "In-person (GEN2016-2018) to Mail (GEN2020)XTrump supporter",
            "vmethod_prepost4:vote20_trump_pre" = "Mail (GEN2016-2018) to In-person (GEN2020)XTrump supporter",
            "vmethod_prepost1:vote20_trump_pre" = "Mail (GEN2016-2018) to Mail (GEN2020)XTrump supporter",
            "vmethod_prepost3:vote20_trump_pre" = "Different method (GEN2016-2018) to Mail (GEN2020)XTrump supporter",
            "vmethod_prepost6:vote20_trump_pre" = "Different method (GEN2016-2018) to In-person (GEN2020)XTrump supporter",
            "political_awareness" = "Political awareness",
            "pid2" = "Republican",
            "pid3" = "No party affiliate",
            "ideo7" = "Ideology",
            "age" = "Age",
            "female" = "Female",
            "white" = "White",
            "education" = "Education",
            "1|2" = "1|2",
            "2|3" = "2|3",
            "3|4" = "3|4")

# Specify list of models 
models = list( mt5_glogit2, mt5_glogit2_int, mt5_glogit1, mt5_glogit1_int)

# Specify caption 
caption = "Generalized Logistic Models Models for Change in Confidence and Validated Vote Method, Pre- and Post- 2020 General Election Survey Waves"

# model output
texreg(models,
       file = "/Users/Enrijeta/Dropbox/Apps/Overleaf/Vote Method and Confidence/appendix/appendix_tables/gologit_tab51.tex",
       caption = caption,
       caption.above=TRUE,
       custom.coef.map = map,
       include.nobs = TRUE,
       custom.note = "%stars. standard errors in parentheses",
       dcolumn = TRUE,
       omit.coef = "county",
       custom.model.names = c("(1)", "(2)", "(3)", "(4)"),
       digits = 3,
       single.row = FALSE)



#==============================# 
# Appendix: Sample composition #
#==============================# 

# Upload dataset including non-respondents in wave 2
load("flsurvey_dta_unmatched.RData")


# panel respondents            
prop.table(table(prepost_dta$Gender))*100
prop.table(table(prepost_dta$race_voterfile))*100
prop.table(table(prepost_dta$PartyAffiliation))*100
prop.table(table(prepost_dta$age))*100
prop.table(table(prepost_dta$voter_confidence_pre))*100
prop.table(table(prepost_dta$voter_confidence_post))*100


# Full dataset (wave 1 + wave 2)
prop.table(table(flsurvey_weighted_vh$voter_confidence_pre))*100
prop.table(table(flsurvey_weighted_vh$votes_count_w21))*100

# First wave respondents that didn't respond on wave 2 
prop.table(table(flsurvey_dta_unmatched$voter_confidence_pre))*100




#==================#
# Panel attrition  #
#==================#

# pre-election survey 
round(prop.table(table(flsurvey_weighted_vh$voter_confidence_pre)),4)*100
round(prop.table(table(flsurvey_weighted_vh$pid)),4)*100
round(prop.table(table(flsurvey_weighted_vh$ideo7)),4)*100
round(prop.table(table(flsurvey_weighted_vh$age)),4)*100
round(prop.table(table(flsurvey_weighted_vh$white)),4)*100
round(prop.table(table(flsurvey_weighted_vh$female)),4)*100
round(prop.table(table(flsurvey_weighted_vh$education)),4)*100
round(prop.table(table(flsurvey_weighted_vh$political_awareness)),4)*100
round(prop.table(table(flsurvey_weighted_vh$vote20_trump_pre)),4)*100


# post-election survey 
round(prop.table(table(prepost_dta$voter_confidence_pre)),4)*100
round(prop.table(table(prepost_dta$pid)),4)*100
round(prop.table(table(prepost_dta$ideo7)),4)*100
round(prop.table(table(prepost_dta$age)),4)*100
round(prop.table(table(prepost_dta$white)),4)*100
round(prop.table(table(prepost_dta$female)),4)*100
round(prop.table(table(prepost_dta$education)),4)*100
round(prop.table(table(prepost_dta$political_awareness)),4)*100
round(prop.table(table(prepost_dta$vote20_trump_pre)),4)*100

# First wave respondents that didn't respond on wave 2 
round(prop.table(table(flsurvey_dta_unmatched$voter_confidence_pre)),4)*100
round(prop.table(table(flsurvey_dta_unmatched$pid)),4)*100
round(prop.table(table(flsurvey_dta_unmatched$ideo7)),4)*100
round(prop.table(table(flsurvey_dta_unmatched$age)),4)*100
round(prop.table(table(flsurvey_dta_unmatched$white)),4)*100
round(prop.table(table(flsurvey_dta_unmatched$female)),4)*100
round(prop.table(table(flsurvey_dta_unmatched$education)),4)*100
round(prop.table(table(flsurvey_dta_unmatched$political_awareness)),4)*100
round(prop.table(table(flsurvey_dta_unmatched$vote20_trump_pre)),4)*100

